## estimates fraction of treatment and control group accepted to conference,
## along with p-values from the test of the null of no effect whatsoever.
## Note SATE, and ATE are not well-defined here b/c of the fact that
## there are fixed number of conference slots and all of them must be filled.
## This creates a SUTVA violation if Y(0) is not equal to Y(1) for all units.
##
## table of results has the following columns (in order):
##
## subgroup    frac. accepted    frac. accepted    p-value
##                  treated           control
##
##
## Cait Unkovic, Maya Sen, Kevin Quinn
##
## 12/16/2014
##

set.seed(271031)

nperm <- 5001

## utility functions

## stratum specific estimates of ATE
ATE.s <- function(data, outcome="accept"){
  sex.vals <- unique(data$sex)
  univ.vals <- unique(data$univ)

  univ <- rep(NA, length(univ.vals)*2)
  sex <- rep(NA, length(univ.vals)*2)
  n.units <- rep(NA, length(univ.vals)*2)
  ATE.hat <- rep(NA, length(univ.vals)*2)

  counter <- 1
  for (u in univ.vals){
    for (s in sex.vals){
      data.sub <- data[data$univ == u & data$sex == s,]
      treated.data <- na.omit(data.sub[data.sub$treat==1,outcome])
      control.data <- na.omit(data.sub[data.sub$treat==0,outcome])
      n.treat <- length(treated.data)
      n.control <- length(control.data)
            
      univ[counter] <- u
      sex[counter] <- s
      n.units[counter] <- n.treat + n.control
      ATE.hat[counter] <- mean(treated.data) - mean(control.data)
      
      counter <- counter + 1
      }
  }
  
  return(data.frame(univ, sex, n.units, ATE.hat, stringsAsFactors=FALSE))
}


## ATE for full sample or subset of sample
##
## subset should either be NULL which correspons to the full sample or
##        a logical vector with TRUE values corresponding to university-sex
##        combinations to include the average and FALSE values corresponding
##        to university-sex combinations to exclude from the average.
ATE <- function(data, outcome="accept", subset=NULL){
  holder <- ATE.s(data, outcome)
  if (is.null(subset)){
    subset=rep(TRUE, nrow(holder))
  }
  ## error checking for subset
  if (length(subset) != nrow(holder)){
    stop("length of subset not equal to number of university-sex combinations")
  }
  if (!is.logical(subset)){
    stop("subset is not a logical vector")
  }
  holder <- holder[subset,]
  weights <- holder$n.units / sum(holder$n.units)
  estimate <- sum(holder$ATE.hat*weights)
  return(estimate)
}




## p-value for the test of null of no individual effect whatsoever 
##
## subset should either be NULL which correspons to the full sample or
##        a logical vector with TRUE values corresponding to university-sex
##        combinations to include the average and FALSE values corresponding
##        to university-sex combinations to exclude from the average.
get.perm.pval <- function(data, outcome="accept", M=5001, subset=NULL){
  holder <- ATE.s(data, outcome)
  if (is.null(subset)){
    subset=rep(TRUE, nrow(holder))
  }
  ## error checking for subset
  if (length(subset) != nrow(holder)){
    stop("length of subset not equal to number of university-sex combinations")
  }
  if (!is.logical(subset)){
    stop("subset is not a logical vector")
  }
  
  ATE.s.obs <- holder[subset,]
  ATE.obs <- ATE(data, outcome, subset)

  ATE.sim <- rep(NA, M)
  ATE.s.sim <- matrix(NA, M, nrow(ATE.s.obs))

  sex.vals <- unique(ATE.s.obs$sex)
  univ.vals <- unique(ATE.s.obs$univ)
  for (iter in 1:M){
    counter <- 1
    n.obs <- rep(NA, nrow(ATE.s.obs))
    for (u in univ.vals){
      for (s in sex.vals){
        data.sub <- data[data$univ == u & data$sex == s,] 
        n.treat <- sum(data.sub$treat==1)
        n.control <- sum(data.sub$treat==0)
        data.sub$treat <- sample(c(rep(1, n.treat), rep(0, n.control)),
                                 replace=FALSE)
        
        treated.data <- na.omit(data.sub[data.sub$treat==1,outcome])
        control.data <- na.omit(data.sub[data.sub$treat==0,outcome])
        n.treat <- length(treated.data)
        n.control <- length(control.data)
        n.obs[counter] <- n.treat + n.control

        ATE.s.sim[iter, counter] <- mean(treated.data) - mean(control.data)
        
        counter <- counter + 1
      }
    }
    weights <- n.obs / sum(n.obs)
    ATE.sim[iter] <- sum(ATE.s.sim[iter,]*weights)
  }

  pvals.s <- rep(NA, nrow(ATE.s.obs))
  for (i in 1:nrow(ATE.s.obs)){
    pvals.s[i] <- mean(abs(ATE.s.sim[,i]) >= abs(ATE.s.obs$ATE.hat[i]) )
  }

  pval.overall <- mean(abs(ATE.sim) >= abs(ATE.obs) )

  output1 <- data.frame("ATE", sum(n.obs), ATE.obs, pval.overall)
  return(output1)

}




## p-value for the test of null of no individual effect whatsoever 
##
## doesn't account for the blocking in the original randomization
## most useful for the analysis of the applier subpopulation
##
get.perm.pval2 <- function(data, outcome="accept", M=5001){
  
  ATE.obs <- mean(data[data$treat==1,outcome]) -
    mean(data[data$treat==0, outcome])

  ATE.sim <- rep(NA, M)

  for (iter in 1:M){
    n.treat <- sum(data$treat==1)
    n.control <- sum(data$treat==0)
    data$treat <- sample(c(rep(1, n.treat), rep(0, n.control)),
                                 replace=FALSE)
    treated.data <- na.omit(data[data$treat==1,outcome])
    control.data <- na.omit(data[data$treat==0,outcome])

    ATE.sim[iter] <- mean(treated.data) - mean(control.data)
  }
  
  pval.overall <- mean(abs(ATE.sim) >= abs(ATE.obs) )
  n.obs <- n.treat + n.control

  output <- data.frame("ATE", sum(n.obs), ATE.obs, pval.overall)
  return(output)
}



## p-value for the test of null that acceptance decisions are
## randomly allocated among appliers conditional on sex and tier of
## school (top10, top11-25, top26-50).
##
##
get.perm.pval3 <- function(data, outcome="accept", M=5001){
  
  ATE.obs <- mean(data[data$treat==1,outcome]) -
    mean(data[data$treat==0, outcome])

  ATE.sim <- rep(0, M)

  sex.vals <- unique(data$sex)
  tier.vals <- unique(data$tier)

  n.obs <- nrow(data)
  
  for (iter in 1:M){
    for (s in sex.vals){
      for (u in tier.vals){
        data.sub <- data[data$sex==s & data$tier==u,]
        
        n.treat <- sum(data.sub$treat==1)
        n.control <- sum(data.sub$treat==0)
        data.sub$treat <- sample(c(rep(1, n.treat), rep(0, n.control)),
                             replace=FALSE)
        treated.data <- na.omit(data.sub[data.sub$treat==1,outcome])
        control.data <- na.omit(data.sub[data.sub$treat==0,outcome])

        w <- (n.treat + n.control) / n.obs
        ATE.sim[iter] <- ATE.sim[iter]  + (mean(treated.data) -
                                           mean(control.data)) * w
      }
    }
  }
  
  pval.overall <- mean(abs(ATE.sim) >= abs(ATE.obs) )

  output <- data.frame("ATE", sum(n.obs), ATE.obs, pval.overall)
  return(output)
}

















## tabulate data (used to format data for robins.ci() )
tabulate.data <- function(data, outcome="accept"){
  sex.vals <- unique(data$sex)
  univ.vals <- unique(data$univ)

  univ <- rep(NA, length(univ.vals)*2)
  sex <- rep(NA, length(univ.vals)*2)
  X0Y0 <- rep(NA, length(univ.vals)*2)
  X0Y1 <- rep(NA, length(univ.vals)*2)
  X1Y0 <- rep(NA, length(univ.vals)*2)
  X1Y1 <- rep(NA, length(univ.vals)*2)
  n.treated <- rep(NA, length(univ.vals)*2)
  n.control <- rep(NA, length(univ.vals)*2)
  n.units <- rep(NA, length(univ.vals)*2)
  
  counter <- 1
  for (u in univ.vals){
    for (s in sex.vals){
      data.sub <- data[data$univ == u & data$sex == s,]
      treated.data <- na.omit(data.sub[data.sub$treat==1,outcome])
      control.data <- na.omit(data.sub[data.sub$treat==0,outcome])

      n.treated[counter] <- length(treated.data)
      n.control[counter] <- length(control.data)            
      n.units[counter] <- n.treated[counter] + n.control[counter]
      univ[counter] <- u
      sex[counter] <- s
      X0Y0[counter] <- sum(control.data==0)
      X0Y1[counter] <- sum(control.data==1)
      X1Y0[counter] <- sum(treated.data==0)
      X1Y1[counter] <- sum(treated.data==1)
      
      counter <- counter + 1
      }
  }
  
  return(data.frame(univ, sex, n.units, n.treated, n.control, X0Y0, X0Y1,
                    X1Y0, X1Y1, stringsAsFactors=FALSE))

}




















#########################################################################
#########################################################################
## restricting the sample to just those who applied
##
## analysis subclassifies on sex and tier of school to construct
## p-values for aggregate differences (full sample, all men, etc.)
###################################################################
## read data in and do some processing of the raw data
x <- read.csv("gradcontacts_treatedcontrol_import.csv",
              stringsAsFactors=FALSE)

names(x) <- c("univ", "sex", "ID", "treat", "applied", "accept",
              "app.sex", "app.race")

## restriction to just those who applied
x <- x[x$applied==1,]

## replace'2' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    2 indicates accepted for poster presentation)
x$accept[x$accept==2] <- 1

## replace'5' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    5 indicates accepted for paper presentation)
x$accept[x$accept==5] <- 1


## replace 9 as an outcome for accept --
## I used 9 to indicate someonw who didn't apply and couldn't
## be accepted when hand merging the datasets
x$accept[x$accept==9] <- 0


## attach the school rank data
univlist <- read.csv("university.list.csv",
                     stringsAsFactors=FALSE)
## clean up a bad entry
univlist <- univlist[1:53,]
## convert rank to numeric
univlist$u.rank <- as.numeric(univlist$u.rank)

x$Dept.Rank <- NULL
for (u in unique(x$univ)){
  x$Dept.Rank[x$univ == u] <- univlist$u.rank[univlist$univ == u]
}

x$tier <- ""
x$tier[x$Dept.Rank <= 10] <- "top10"
x$tier[x$Dept.Rank > 10 & x$Dept.Rank <=25] <- "top11.25"
x$tier[x$Dept.Rank > 25 & x$Dept.Rank <=50] <- "top26.50"








output.table <- data.frame(subgroup=c("full sample", "men", "women",
                             "top 10", "men top 10", "women top 10",
                             "top 11 to 25", "men top 11 to 25",
                             "women top 11 to 25",
                             "top 26 to 50", "men top 26 to 50",
                             "women top 26 to 50"), n=NA,
                           meanY.treated=NA, meanY.control=NA, pval=NA)


## overall effect
output.table$subgroup[1] <- "full sample"
pvals <- get.perm.pval3(x, "accept", M=nperm)
output.table$pval[1] <- pvals["pval.overall"]
output.table$meanY.treated[1] <- mean(x[x$treat==1,"accept"])
output.table$meanY.control[1] <- mean(x[x$treat==0,"accept"])
output.table$n[1] <- pvals["sum.n.obs."]

## effect for men
output.table$subgroup[2] <- "men"
pvals <- get.perm.pval3(x[x$sex=="Male",], "accept", M=nperm)
output.table$pval[2] <- pvals["pval.overall"]
output.table$meanY.treated[2] <- mean(x[x$treat==1 & x$sex=="Male","accept"])
output.table$meanY.control[2] <- mean(x[x$treat==0 & x$sex=="Male","accept"])
output.table$n[2] <- pvals["sum.n.obs."]


## effect for women
output.table$subgroup[3] <- "women"
pvals <- get.perm.pval3(x[x$sex=="Female",], "accept", M=nperm)
output.table$pval[3] <- pvals["pval.overall"]
output.table$meanY.treated[3] <- mean(x[x$treat==1 & x$sex=="Female","accept"])
output.table$meanY.control[3] <- mean(x[x$treat==0 & x$sex=="Female","accept"])
output.table$n[3] <- pvals["sum.n.obs."]




## effect for both men and women top-10 programs
top10 <- univlist$univ[univlist$u.rank <= 10]
output.table$subgroup[4] <- "top 10"
pvals <- get.perm.pval3(x[x$univ %in% top10,], "accept", M=nperm)
output.table$pval[4] <- pvals["pval.overall"]
output.table$meanY.treated[4] <- mean(x[x$treat==1 & x$univ %in% top10,"accept"])
output.table$meanY.control[4] <- mean(x[x$treat==0 & x$univ %in% top10,"accept"])
output.table$n[4] <- pvals["sum.n.obs."]



## effect for men in top-10 programs
output.table$subgroup[5] <- "men top 10"
pvals <- get.perm.pval3(x[x$sex=="Male" & x$univ %in% top10,], "accept",
                        M=nperm)
output.table$pval[5] <- pvals["pval.overall"]
output.table$meanY.treated[5] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$meanY.control[5] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$n[5] <- pvals["sum.n.obs."]


## effect for women in top-10 programs
output.table$subgroup[6] <- "women top 10"
pvals <- get.perm.pval3(x[x$sex=="Female" & x$univ %in% top10,],
                        "accept", M=nperm)
output.table$pval[6] <- pvals["pval.overall"]
output.table$meanY.treated[6] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$meanY.control[6] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$n[6] <- pvals["sum.n.obs."]






## effect for both men and women in 11-25 programs
top11.25 <- univlist$univ[univlist$u.rank <= 25 & univlist$u.rank >=11]
output.table$subgroup[7] <- "top 11 to 25"
pvals <- get.perm.pval3(x[x$univ %in% top11.25,], "accept", M=nperm)
output.table$pval[7] <- pvals["pval.overall"]
output.table$meanY.treated[7] <- mean(x[x$treat==1 & x$univ %in% top11.25,"accept"])
output.table$meanY.control[7] <- mean(x[x$treat==0 & x$univ %in% top11.25,"accept"])
output.table$n[7] <- pvals["sum.n.obs."]




## effect for men in 11-25 programs
output.table$subgroup[8] <- "men top 11 to 25"
pvals <- get.perm.pval3(x[x$sex=="Male" & x$univ %in% top11.25,], "accept",
                        M=nperm)
output.table$pval[8] <- pvals["pval.overall"]
output.table$meanY.treated[8] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[8] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$n[8] <- pvals["sum.n.obs."]




## effect for women in 11-25 programs
output.table$subgroup[9] <- "women top 11 to 25"
pvals <- get.perm.pval3(x[x$sex=="Female" & x$univ %in% top11.25,], "accept", M=nperm)
output.table$pval[9] <- pvals["pval.overall"]
output.table$meanY.treated[9] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[9] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$n[9] <- pvals["sum.n.obs."]



## effect for both men and women in 26-50 programs
top26.50 <- univlist$univ[univlist$u.rank <= 50 & univlist$u.rank >=26]
output.table$subgroup[10] <- "top 26 to 50"
pvals <- get.perm.pval3(x[x$univ %in% top26.50,], "accept", M=nperm)
output.table$pval[10] <- pvals["pval.overall"]
output.table$meanY.treated[10] <- mean(x[x$treat==1 & x$univ %in% top26.50,"accept"])
output.table$meanY.control[10] <- mean(x[x$treat==0 & x$univ %in% top26.50,"accept"])
output.table$n[10] <- pvals["sum.n.obs."]





## effect for men in 26-50 programs
output.table$subgroup[11] <- "men top 26 to 50"
pvals <- get.perm.pval3(x[x$sex=="Male" & x$univ %in% top26.50,],
                        "accept", M=nperm)
output.table$pval[11] <- pvals["pval.overall"]
output.table$meanY.treated[11] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[11] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$n[11] <- pvals["sum.n.obs."]




## effect for women in 26-50 programs
output.table$subgroup[12] <- "women top 26 to 50"
pvals <- get.perm.pval3(x[x$sex=="Female" & x$univ %in% top26.50,],
                        "accept", M=nperm)
output.table$pval[12] <- pvals["pval.overall"]
output.table$meanY.treated[12] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[12] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$n[12] <- pvals["sum.n.obs."]




save(output.table, file="AcceptSummary-Applied-Strat.Rda")




library(xtable)

print(xtable(output.table, display=c("s", "s", "d", "f", "f", "f"),
             digits=c(10, 10, 4, 3, 3, 3)),
      include.rownames=FALSE)









#########################################################################
#########################################################################
## restricting the sample to just those who applied
##
## Analysis does not subclassify on sex or tier of school to contruct
## p-values for aggregate quantities (full sample, all men, etc.)
###################################################################
## read data in and do some processing of the raw data
x <- read.csv("gradcontacts_treatedcontrol_import.csv",
              stringsAsFactors=FALSE)

names(x) <- c("univ", "sex", "ID", "treat", "applied", "accept",
              "app.sex", "app.race")

## restriction to just those who applied
x <- x[x$applied==1,]

## replace'2' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    2 indicates accepted for poster presentation)
x$accept[x$accept==2] <- 1

## replace'5' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    5 indicates accepted for paper presentation)
x$accept[x$accept==5] <- 1


## replace 9 as an outcome for accept and --
## I used 9 to indicate someonw who didn't apply and couldn't
## be accepted when hand merging the datasets
x$accept[x$accept==9] <- 0


## attach the school rank data
univlist <- read.csv("university.list.csv",
                     stringsAsFactors=FALSE)
## clean up a bad entry
univlist <- univlist[1:53,]
## convert rank to numeric
univlist$u.rank <- as.numeric(univlist$u.rank)

x$Dept.Rank <- NULL
for (u in unique(x$univ)){
  x$Dept.Rank[x$univ == u] <- univlist$u.rank[univlist$univ == u]
}










output.table <- data.frame(subgroup=c("full sample", "men", "women",
                             "top 10", "men top 10", "women top 10",
                             "top 11 to 25", "men top 11 to 25",
                             "women top 11 to 25",
                             "top 26 to 50", "men top 26 to 50",
                             "women top 26 to 50"), n=NA,
                           meanY.treated=NA, meanY.control=NA, pval=NA)


## overall effect
output.table$subgroup[1] <- "full sample"
pvals <- get.perm.pval2(x, "accept", M=nperm)
output.table$pval[1] <- pvals["pval.overall"]
output.table$meanY.treated[1] <- mean(x[x$treat==1,"accept"])
output.table$meanY.control[1] <- mean(x[x$treat==0,"accept"])
output.table$n[1] <- pvals["sum.n.obs."]

## effect for men
output.table$subgroup[2] <- "men"
pvals <- get.perm.pval2(x[x$sex=="Male",], "accept", M=nperm)
output.table$pval[2] <- pvals["pval.overall"]
output.table$meanY.treated[2] <- mean(x[x$treat==1 & x$sex=="Male","accept"])
output.table$meanY.control[2] <- mean(x[x$treat==0 & x$sex=="Male","accept"])
output.table$n[2] <- pvals["sum.n.obs."]


## effect for women
output.table$subgroup[3] <- "women"
pvals <- get.perm.pval2(x[x$sex=="Female",], "accept", M=nperm)
output.table$pval[3] <- pvals["pval.overall"]
output.table$meanY.treated[3] <- mean(x[x$treat==1 & x$sex=="Female","accept"])
output.table$meanY.control[3] <- mean(x[x$treat==0 & x$sex=="Female","accept"])
output.table$n[3] <- pvals["sum.n.obs."]




## effect for both men and women top-10 programs
top10 <- univlist$univ[univlist$u.rank <= 10]
output.table$subgroup[4] <- "top 10"
pvals <- get.perm.pval2(x[x$univ %in% top10,], "accept", M=nperm)
output.table$pval[4] <- pvals["pval.overall"]
output.table$meanY.treated[4] <- mean(x[x$treat==1 & x$univ %in% top10,"accept"])
output.table$meanY.control[4] <- mean(x[x$treat==0 & x$univ %in% top10,"accept"])
output.table$n[4] <- pvals["sum.n.obs."]



## effect for men in top-10 programs
output.table$subgroup[5] <- "men top 10"
pvals <- get.perm.pval2(x[x$sex=="Male" & x$univ %in% top10,], "accept",
                        M=nperm)
output.table$pval[5] <- pvals["pval.overall"]
output.table$meanY.treated[5] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$meanY.control[5] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$n[5] <- pvals["sum.n.obs."]


## effect for women in top-10 programs
output.table$subgroup[6] <- "women top 10"
pvals <- get.perm.pval2(x[x$sex=="Female" & x$univ %in% top10,],
                        "accept", M=nperm)
output.table$pval[6] <- pvals["pval.overall"]
output.table$meanY.treated[6] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$meanY.control[6] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$n[6] <- pvals["sum.n.obs."]






## effect for both men and women in 11-25 programs
top11.25 <- univlist$univ[univlist$u.rank <= 25 & univlist$u.rank >=11]
output.table$subgroup[7] <- "top 11 to 25"
pvals <- get.perm.pval2(x[x$univ %in% top11.25,], "accept", M=nperm)
output.table$pval[7] <- pvals["pval.overall"]
output.table$meanY.treated[7] <- mean(x[x$treat==1 & x$univ %in% top11.25,"accept"])
output.table$meanY.control[7] <- mean(x[x$treat==0 & x$univ %in% top11.25,"accept"])
output.table$n[7] <- pvals["sum.n.obs."]





## effect for men in 11-25 programs
output.table$subgroup[8] <- "men top 11 to 25"
pvals <- get.perm.pval2(x[x$sex=="Male" & x$univ %in% top11.25,], "accept",
                        M=nperm)
output.table$pval[8] <- pvals["pval.overall"]
output.table$meanY.treated[8] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[8] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$n[8] <- pvals["sum.n.obs."]




## effect for women in 11-25 programs
output.table$subgroup[9] <- "women top 11 to 25"
pvals <- get.perm.pval2(x[x$sex=="Female" & x$univ %in% top11.25,], "accept", M=nperm)
output.table$pval[9] <- pvals["pval.overall"]
output.table$meanY.treated[9] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[9] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$n[9] <- pvals["sum.n.obs."]



## effect for both men and women in 26-50 programs
top26.50 <- univlist$univ[univlist$u.rank <= 50 & univlist$u.rank >=26]
output.table$subgroup[10] <- "top 26 to 50"
pvals <- get.perm.pval2(x[x$univ %in% top26.50,], "accept", M=nperm)
output.table$pval[10] <- pvals["pval.overall"]
output.table$meanY.treated[10] <- mean(x[x$treat==1 & x$univ %in% top26.50,"accept"])
output.table$meanY.control[10] <- mean(x[x$treat==0 & x$univ %in% top26.50,"accept"])
output.table$n[10] <- pvals["sum.n.obs."]





## effect for men in 26-50 programs
output.table$subgroup[11] <- "men top 26 to 50"
pvals <- get.perm.pval2(x[x$sex=="Male" & x$univ %in% top26.50,],
                        "accept", M=nperm)
output.table$pval[11] <- pvals["pval.overall"]
output.table$meanY.treated[11] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[11] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$n[11] <- pvals["sum.n.obs."]




## effect for women in 26-50 programs
output.table$subgroup[12] <- "women top 26 to 50"
pvals <- get.perm.pval2(x[x$sex=="Female" & x$univ %in% top26.50,],
                        "accept", M=nperm)
output.table$pval[12] <- pvals["pval.overall"]
output.table$meanY.treated[12] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[12] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$n[12] <- pvals["sum.n.obs."]




save(output.table, file="AcceptSummary-Applied.Rda")




library(xtable)

print(xtable(output.table, display=c("s", "s", "d", "f", "f", "f"),
             digits=c(10, 10, 4, 3, 3, 3)),
      include.rownames=FALSE)














###########################################################################
###########################################################################
## All units
###################################################################
## read data in and do some processing of the raw data
x <- read.csv("gradcontacts_treatedcontrol_import.csv",
              stringsAsFactors=FALSE)

names(x) <- c("univ", "sex", "ID", "treat", "applied", "accept",
              "app.sex", "app.race")

## replace'2' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    2 indicates accepted for poster presentation)
x$accept[x$accept==2] <- 1

## replace'5' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    5 indicates accepted for paper presentation)
x$accept[x$accept==5] <- 1

## replace 9 as an outcome for accept --
## I used 9 to indicate someonw who didn't apply and couldn't
## be accepted when hand merging the datasets
x$accept[x$accept==9] <- 0


## attach the school rank data
univlist <- read.csv("university.list.csv",
                     stringsAsFactors=FALSE)
## clean up a bad entry
univlist <- univlist[1:53,]
## convert rank to numeric
univlist$u.rank <- as.numeric(univlist$u.rank)

x$Dept.Rank <- NULL
for (u in unique(x$univ)){
  x$Dept.Rank[x$univ == u] <- univlist$u.rank[univlist$univ == u]
}











output.table <- data.frame(subgroup=c("full sample", "men", "women",
                             "top 10", "men top 10", "women top 10",
                             "top 11 to 25", "men top 11 to 25",
                             "women top 11 to 25",
                             "top 26 to 50", "men top 26 to 50",
                             "women top 26 to 50"), n=NA,
                           meanY.treated=NA, meanY.control=NA, pval=NA)


## overall effect
output.table$subgroup[1] <- "full sample"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm)
output.table$pval[1] <- pvals["pval.overall"]
output.table$meanY.treated[1] <- mean(x[x$treat==1,"accept"])
output.table$meanY.control[1] <- mean(x[x$treat==0,"accept"])
output.table$n[1] <- pvals["sum.n.obs."]


## effect for men
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[2] <- "men"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$sex=="Male")
output.table$pval[2] <- pvals["pval.overall"]
output.table$meanY.treated[2] <- mean(x[x$treat==1 & x$sex=="Male","accept"])
output.table$meanY.control[2] <- mean(x[x$treat==0 & x$sex=="Male","accept"])
output.table$n[2] <- pvals["sum.n.obs."]



## effect for women
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[3] <- "women"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$sex=="Female")
output.table$pval[3] <- pvals["pval.overall"]
output.table$meanY.treated[3] <- mean(x[x$treat==1 & x$sex=="Female","accept"])
output.table$meanY.control[3] <- mean(x[x$treat==0 & x$sex=="Female","accept"])
output.table$n[3] <- pvals["sum.n.obs."]




## effect for both men and women top-10 programs
top10 <- univlist$univ[univlist$u.rank <= 10]
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[4] <- "top 10"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top10)
output.table$pval[4] <- pvals["pval.overall"]
output.table$meanY.treated[4] <- mean(x[x$treat==1 & x$univ %in% top10,"accept"])
output.table$meanY.control[4] <- mean(x[x$treat==0 & x$univ %in% top10,"accept"])
output.table$n[4] <- pvals["sum.n.obs."]



## effect for men in top-10 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[5] <- "men top 10"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top10 &
                       ATE.s.hat$sex=="Male")
output.table$pval[5] <- pvals["pval.overall"]
output.table$meanY.treated[5] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$meanY.control[5] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top10,"accept"])
output.table$n[5] <- pvals["sum.n.obs."]



## effect for women in top-10 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[6] <- "women top 10"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top10 &
                       ATE.s.hat$sex=="Female")
output.table$pval[6] <- pvals["pval.overall"]
output.table$meanY.treated[6] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$meanY.control[6] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top10,"accept"])
output.table$n[6] <- pvals["sum.n.obs."]






## effect for both men and women in 11-25 programs
top11.25 <- univlist$univ[univlist$u.rank <= 25 & univlist$u.rank >=11]
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[7] <- "top 11 to 25"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top11.25)
output.table$pval[7] <- pvals["pval.overall"]
output.table$meanY.treated[7] <- mean(x[x$treat==1 & x$univ %in% top11.25,"accept"])
output.table$meanY.control[7] <- mean(x[x$treat==0 & x$univ %in% top11.25,"accept"])
output.table$n[7] <- pvals["sum.n.obs."]





## effect for men in 11-25 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[8] <- "men top 11 to 25"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top11.25 &
                       ATE.s.hat$sex=="Male")
output.table$pval[8] <- pvals["pval.overall"]
output.table$meanY.treated[8] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[8] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top11.25,"accept"])
output.table$n[8] <- pvals["sum.n.obs."]




## effect for women in 11-25 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[9] <- "women top 11 to 25"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top11.25 &
                       ATE.s.hat$sex=="Female")
output.table$pval[9] <- pvals["pval.overall"]
output.table$meanY.treated[9] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$meanY.control[9] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top11.25,"accept"])
output.table$n[9] <- pvals["sum.n.obs."]




## effect for both men and women in 26-50 programs
top26.50 <- univlist$univ[univlist$u.rank <= 50 & univlist$u.rank >=26]
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[10] <- "top 26 to 50"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top26.50)
output.table$pval[10] <- pvals["pval.overall"]
output.table$meanY.treated[10] <- mean(x[x$treat==1 & x$univ %in% top26.50,"accept"])
output.table$meanY.control[10] <- mean(x[x$treat==0 & x$univ %in% top26.50,"accept"])
output.table$n[10] <- pvals["sum.n.obs."]





## effect for men in 26-50 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[11] <- "men top 26 to 50"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top26.50 &
                       ATE.s.hat$sex=="Male")
output.table$pval[11] <- pvals["pval.overall"]
output.table$meanY.treated[11] <- mean(x[x$treat==1 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[11] <- mean(x[x$treat==0 & x$sex=="Male" & x$univ %in% top26.50,"accept"])
output.table$n[11] <- pvals["sum.n.obs."]




## effect for women in 26-50 programs
ATE.s.hat <- ATE.s(x, "accept")
output.table$subgroup[12] <- "women top 26 to 50"
tab.data <- tabulate.data(x, outcome="accept")
pvals <- get.perm.pval(x, "accept", M=nperm,
                       subset=ATE.s.hat$univ %in% top26.50 &
                       ATE.s.hat$sex=="Female")
output.table$pval[12] <- pvals["pval.overall"]
output.table$meanY.treated[12] <- mean(x[x$treat==1 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$meanY.control[12] <- mean(x[x$treat==0 & x$sex=="Female" & x$univ %in% top26.50,"accept"])
output.table$n[12] <- pvals["sum.n.obs."]




save(output.table, file="AcceptSummary-All.Rda")




library(xtable)

print(xtable(output.table, display=c("s", "s", "d", "f", "f", "f"),
             digits=c(10, 10, 4, 3, 3, 3)),
      include.rownames=FALSE)



